/*************************************************************************
 * The contents of this file are subject to the MYRICOM MYRINET          *
 * EXPRESS (MX) NETWORKING SOFTWARE AND DOCUMENTATION LICENSE (the       *
 * "License"); User may not use this file except in compliance with the  *
 * License.  The full text of the License can found in LICENSE.TXT       *
 *                                                                       *
 * Software distributed under the License is distributed on an "AS IS"   *
 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied.  See  *
 * the License for the specific language governing rights and            *
 * limitations under the License.                                        *
 *                                                                       *
 * Copyright 2005 by Myricom, Inc.  All rights reserved.                 *
 *************************************************************************/

#include <stdlib.h>
#include <stdio.h>

#include "mx_auto_config.h"
#include "mxsmpi.h"
#include "myriexpress.h"
#include "mx_timing.h"
#include "mx__lib_types.h"
#include "mx__lib.h"
#include "mx_byteswap.h"
#include "mx__mcp_request_ring.h"
#include "mcp_events.h"
#include "mx_stbar.h"
#include "mx__requests.h"
#include "mx__partner.h"
#include "mx__endpoint.h"

static int 
double_cmp(const void *ap, const void *bp)
{
  const double *a = ap,*b = bp;
  return *a > *b ? 1 : *a == *b ? 0 : -1;
}

static inline void raw_recv(struct mx_endpoint *ep)
{
  mcp_uevt_t *mcp_event;
  int received = 0;
  int type;

  do {
    mcp_event = (mcp_uevt_t *) ep->eventq_uevt;
    while (!(type = mcp_event->basic.type)) {
#if MX_CPU_x86
      asm volatile("\trep ;nop\n" ::: "memory");
#else
      asm volatile("" ::: "memory");
#endif
    }
    switch (type) {
    case MX_MCP_UEVT_RECV_TINY:
      received = 1;
      break;
    case MX_MCP_UEVT_DONE_SUCCESS:
      break;
    case MX_MCP_UEVT_RECV_TRUC:
      /* ignore ack */
      break;
    default:
      mx_printf("type=%d\n", type);
      mx_fatal("unknown type\n");
    }
    mcp_event->basic.type = 0;
    ep->eventq_uevt++;
    ep->eventq_index++;
    ep->eventq_flow++;
    ep->event_count++;
    if (ep->event_count >= 32) {
      *ep->flow = htonl(ep->eventq_flow);
      ep->event_count = 0;
    }
    
    if (ep->eventq_index == (ep->eventq_length / sizeof (mcp_uevt_t))) {
      ep->eventq_uevt = (mcp_uevt_t*)ep->eventq;
      ep->eventq_index = 0;
    }
  } while (!received);
}


static inline void raw_send(struct mx_endpoint *ep, struct mx__partner *partner)
{
  mcp_ureq_t * req =  ep->req_ring->base + ep->req_ring->tail;
  ep->req_ring->tail = (ep->req_ring->tail+1) & ep->req_ring->mask;
  mx__post_ureq_tiny(ep->is_ze, req, partner,
		     /*match*/0,
		     /*length*/0,
		     /*seqnum*/0, 
		     /*cookie*/0,
		     /*data*/NULL
		     );
}


int main(int argc,char*argv[])
{
  struct mx_endpoint *ep;
  int rank;
  int i, iter;
  mx_cycles_t *timings;
  double *dtimes;
  iter = argc > 1 ? atoi(argv[1]) : 10;
  struct mx__partner *partner;

  dtimes = calloc(iter,sizeof(dtimes[0]));
  timings = calloc(iter+1,sizeof(timings[0]));

  mx_cycles_counter_init();
  MXSMPI_Init(&argc,&argv);
  MXSMPI_Comm_rank(MXSMPI_COMM_WORLD, &rank);
  ep = MXSMPI_COMM_WORLD->ep;
  /* try to synchronize a little bit with a issend */
  if (rank == 0) {
    mx_segment_t seg = {0,0};
    mx_request_t req;
    uint32_t res;
    mx_status_t status;
    mx_issend(ep, &seg, 1, MXSMPI_COMM_WORLD->peers[1].addr, 1,0,&req);
    mx_wait(ep, &req, MX_INFINITE, &status, &res);
  } else {
    mx_segment_t seg = {0,0};
    mx_request_t req;
    uint32_t res;
    mx_status_t status;
    mx_irecv(ep, &seg, 1, 1, MX_MATCH_MASK_NONE,0,&req);
    mx_wait(ep, &req, MX_INFINITE, &status, &res);
  }
  if (ep->is_ze)
    ep->req_ring->mask = 3;
  usleep(300000);
  partner = mx__partner_from_addr(&MXSMPI_COMM_WORLD->peers[!rank].addr);
  printf("rank=%d, iter=%d\n", rank, iter);
  if (rank == 0) {
    for (i=0;i<iter;i++) {
      timings[i] = mx_get_cycles();
      raw_send(ep, partner);
      raw_recv(ep);
    }
    timings[i] = mx_get_cycles();
  } else if (rank == 1) {
    for (i=0;i<iter;i++) {
      raw_recv(ep);
      raw_send(ep, partner);
    }
  }
  printf("end\n");
  if (rank == 0) {
    double avg;
    for (i=0;i<iter;i++) {
      dtimes[i] = (timings[i+1] - timings[i])*mx_seconds_per_cycle();
    }
    avg = (timings[iter] - timings[0]) *mx_seconds_per_cycle() / iter;
    
    qsort(dtimes, iter, sizeof(dtimes[0]), double_cmp);
    printf("median,\tavg,\tworst,\tbest\n %.3f us\t%.3fus\t%.3fus\t%.3fus\n", 
	   dtimes[iter/2]*1e6/2,avg*1e6/2, dtimes[iter-1]*1e6/2, dtimes[0]*1e6/2);
  }
  return 0;
}
